#!/usr/bin/env python

__license__   = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'

'''
lxml based OPF parser.
'''

import copy
import functools
import glob
import io
import json
import os
import re
import sys
import uuid
from contextlib import suppress
from urllib.parse import urlparse

from lxml import etree

from calibre import guess_type, prints
from calibre.constants import __appname__, __version__, filesystem_encoding
from calibre.ebooks import escape_xpath_attr
from calibre.ebooks.metadata import MetaInformation, check_isbn, string_to_authors
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata.utils import parse_opf
from calibre.ebooks.metadata.utils import pretty_print_opf as _pretty_print
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
from calibre.utils.config import tweaks
from calibre.utils.date import isoformat, parse_date
from calibre.utils.icu import lower as icu_lower
from calibre.utils.icu import upper as icu_upper
from calibre.utils.localization import canonicalize_lang, get_lang
from calibre.utils.xml_parse import safe_xml_fromstring
from polyglot.urllib import unquote

pretty_print_opf = False


class PrettyPrint:

    def __enter__(self):
        global pretty_print_opf
        pretty_print_opf = True

    def __exit__(self, *args):
        global pretty_print_opf
        pretty_print_opf = False


pretty_print = PrettyPrint()


class Resource:  # {{{

    '''
    Represents a resource (usually a file on the filesystem or a URL pointing
    to the web. Such resources are commonly referred to in OPF files.

    They have the interface:

    :member:`path`
    :member:`mime_type`
    :method:`href`
    '''

    def __init__(self, href_or_path, basedir=os.getcwd(), is_path=True):
        self.orig = href_or_path
        self._href = None
        self._basedir = basedir
        self.path = None
        self.fragment = ''
        try:
            self.mime_type = guess_type(href_or_path)[0]
        except Exception:
            self.mime_type = None
        if self.mime_type is None:
            self.mime_type = 'application/octet-stream'
        if is_path:
            path = href_or_path
            if not os.path.isabs(path):
                path = os.path.abspath(os.path.join(basedir, path))
            if isinstance(path, bytes):
                path = path.decode(filesystem_encoding)
            self.path = path
        else:
            url = urlparse(href_or_path)
            if url[0] not in ('', 'file'):
                self._href = href_or_path
            else:
                pc = url[2]
                if isinstance(pc, str):
                    pc = pc.encode('utf-8')
                pc = pc.decode('utf-8')
                self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
                self.fragment = url[-1]

    def href(self, basedir=None):
        '''
        Return a URL pointing to this resource. If it is a file on the filesystem
        the URL is relative to `basedir`.

        `basedir`: If None, the basedir of this resource is used (see :method:`set_basedir`).
        If this resource has no basedir, then the current working directory is used as the basedir.
        '''
        if basedir is None:
            if self._basedir:
                basedir = self._basedir
            else:
                basedir = os.getcwd()
        if self.path is None:
            return self._href
        frag = ('#' + self.fragment) if self.fragment else ''
        if self.path == basedir:
            return frag
        try:
            rpath = os.path.relpath(self.path, basedir)
        except ValueError:  # On windows path and basedir could be on different drives
            rpath = self.path
        if isinstance(rpath, bytes):
            rpath = rpath.decode(filesystem_encoding)
        return rpath.replace(os.sep, '/')+frag

    def set_basedir(self, path):
        self._basedir = path

    def basedir(self):
        return self._basedir

    def __repr__(self):
        return f'Resource({self.path!r}, {self.href()!r})'

# }}}


class ResourceCollection:  # {{{

    def __init__(self):
        self._resources = []

    def __iter__(self):
        yield from self._resources

    def __len__(self):
        return len(self._resources)

    def __getitem__(self, index):
        return self._resources[index]

    def __bool__(self):
        return len(self._resources) > 0

    def __str__(self):
        resources = map(repr, self)
        return '[{}]'.format(', '.join(resources))
    __unicode__ = __str__

    def __repr__(self):
        return str(self)

    def append(self, resource):
        if not isinstance(resource, Resource):
            raise ValueError('Can only append objects of type Resource')
        self._resources.append(resource)

    def remove(self, resource):
        self._resources.remove(resource)

    def replace(self, start, end, items):
        'Same as list[start:end] = items'
        self._resources[start:end] = items

    @staticmethod
    def from_directory_contents(top, topdown=True):
        collection = ResourceCollection()
        for spec in os.walk(top, topdown=topdown):
            path = os.path.abspath(os.path.join(spec[0], spec[1]))
            res = Resource.from_path(path)
            res.set_basedir(top)
            collection.append(res)
        return collection

    def set_basedir(self, path):
        for res in self:
            res.set_basedir(path)

# }}}


class ManifestItem(Resource):  # {{{

    @staticmethod
    def from_opf_manifest_item(item, basedir):
        href = item.get('href', None)
        if href:
            res = ManifestItem(href, basedir=basedir, is_path=True)
            mt = item.get('media-type', '').strip()
            if mt:
                res.mime_type = mt
            return res

    @property
    def media_type(self):
        return self.mime_type

    @media_type.setter
    def media_type(self, val):
        self.mime_type = val

    def __unicode__representation__(self):
        return f'<item id="{self.id}" href="{self.href()}" media-type="{self.media_type}" />'

    __str__ = __unicode__representation__

    def __repr__(self):
        return str(self)

    def __getitem__(self, index):
        if index == 0:
            return self.href()
        if index == 1:
            return self.media_type
        raise IndexError(f'{index} out of bounds.')

# }}}


class Manifest(ResourceCollection):  # {{{

    def append_from_opf_manifest_item(self, item, dir):
        self.append(ManifestItem.from_opf_manifest_item(item, dir))
        id = item.get('id', '')
        if not id:
            id = f'id{self.next_id}'
        self[-1].id = id
        self.next_id += 1

    @staticmethod
    def from_opf_manifest_element(items, dir):
        m = Manifest()
        for item in items:
            try:
                m.append_from_opf_manifest_item(item, dir)
            except ValueError:
                continue
        return m

    @staticmethod
    def from_paths(entries):
        '''
        `entries`: List of (path, mime-type) If mime-type is None it is autodetected
        '''
        m = Manifest()
        for path, mt in entries:
            mi = ManifestItem(path, is_path=True)
            if mt:
                mi.mime_type = mt
            mi.id = f'id{m.next_id}'
            m.next_id += 1
            m.append(mi)
        return m

    def add_item(self, path, mime_type=None):
        mi = ManifestItem(path, is_path=True)
        if mime_type:
            mi.mime_type = mime_type
        mi.id = f'id{self.next_id}'
        self.next_id += 1
        self.append(mi)
        return mi.id

    def __init__(self):
        ResourceCollection.__init__(self)
        self.next_id = 1

    def item(self, id):
        for i in self:
            if i.id == id:
                return i

    def id_for_path(self, path):
        path = os.path.normpath(os.path.abspath(path))
        for i in self:
            if i.path and os.path.normpath(i.path) == path:
                return i.id

    def path_for_id(self, id):
        for i in self:
            if i.id == id:
                return i.path

    def type_for_id(self, id):
        for i in self:
            if i.id == id:
                return i.mime_type

# }}}


class Spine(ResourceCollection):  # {{{

    class Item(Resource):

        def __init__(self, idfunc, *args, **kwargs):
            Resource.__init__(self, *args, **kwargs)
            self.is_linear = True
            self.id = idfunc(self.path)
            self.idref = None

        def __repr__(self):
            return f'Spine.Item(path={self.path!r}, id={self.id}, is_linear={self.is_linear})'

    @staticmethod
    def from_opf_spine_element(itemrefs, manifest):
        s = Spine(manifest)
        seen = set()
        path_map = {i.id:i.path for i in s.manifest}
        for itemref in itemrefs:
            idref = itemref.get('idref', None)
            if idref is not None:
                path = path_map.get(idref)
                if path and path not in seen:
                    r = Spine.Item(lambda x:idref, path, is_path=True)
                    r.is_linear = itemref.get('linear', 'yes') == 'yes'
                    r.idref = idref
                    s.append(r)
                    seen.add(path)
        return s

    @staticmethod
    def from_paths(paths, manifest):
        s = Spine(manifest)
        for path in paths:
            try:
                s.append(Spine.Item(s.manifest.id_for_path, path, is_path=True))
            except Exception:
                continue
        return s

    def __init__(self, manifest):
        ResourceCollection.__init__(self)
        self.manifest = manifest

    def replace(self, start, end, ids):
        '''
        Replace the items between start (inclusive) and end (not inclusive) with
        with the items identified by ids. ids can be a list of any length.
        '''
        items = []
        for id in ids:
            path = self.manifest.path_for_id(id)
            if path is None:
                raise ValueError('id %s not in manifest')
            items.append(Spine.Item(lambda x: id, path, is_path=True))
        ResourceCollection.replace(start, end, items)

    def linear_items(self):
        for r in self:
            if r.is_linear:
                yield r.path

    def nonlinear_items(self):
        for r in self:
            if not r.is_linear:
                yield r.path

    def items(self):
        for i in self:
            yield i.path

# }}}


class Guide(ResourceCollection):  # {{{

    class Reference(Resource):

        @staticmethod
        def from_opf_resource_item(ref, basedir):
            title, href, type = ref.get('title', ''), ref.get('href'), ref.get('type')
            res = Guide.Reference(href, basedir, is_path=True)
            res.title = title
            res.type = type
            return res

        def __repr__(self):
            ans = f'<reference type="{self.type}" href="{self.href()}" '
            if self.title:
                ans += f'title="{self.title}" '
            return ans + '/>'

    @staticmethod
    def from_opf_guide(references, base_dir=os.getcwd()):
        coll = Guide()
        for ref in references:
            try:
                ref = Guide.Reference.from_opf_resource_item(ref, base_dir)
                coll.append(ref)
            except Exception:
                continue
        return coll

    def set_cover(self, path):
        for i in tuple(self):
            if 'cover' in i.type.lower():
                self.remove(i)
        for typ in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'):
            self.append(Guide.Reference(path, is_path=True))
            self[-1].type = typ
            self[-1].title = ''

# }}}


class MetadataField:

    def __init__(self, name, is_dc=True, formatter=None, none_is=None,
            renderer=lambda x: str(x)):
        self.name      = name
        self.is_dc     = is_dc
        self.formatter = formatter
        self.none_is   = none_is
        self.renderer  = renderer

    def __real_get__(self, obj, type=None):
        ans = obj.get_metadata_element(self.name)
        if ans is None:
            return None
        ans = obj.get_text(ans)
        if ans is None:
            return ans
        if self.formatter is not None:
            try:
                ans = self.formatter(ans)
            except Exception:
                return None
        if hasattr(ans, 'strip'):
            ans = ans.strip()
        return ans

    def __get__(self, obj, type=None):
        ans = self.__real_get__(obj, type)
        if ans is None:
            ans = self.none_is
        return ans

    def __set__(self, obj, val):
        elem = obj.get_metadata_element(self.name)
        if val is None:
            if elem is not None:
                elem.getparent().remove(elem)
            return
        if elem is None:
            elem = obj.create_metadata_element(self.name, is_dc=self.is_dc)
        obj.set_text(elem, self.renderer(val))


class LinkMapsField:

    def __get__(self, obj, type=None):
        ans = obj.get_metadata_element('link_maps')
        if ans is not None:
            ans = obj.get_text(ans)
            if ans:
                with suppress(Exception):
                    return json.loads(ans)
        ans = obj.get_metadata_element('author_link_map')
        if ans is not None:
            ans = obj.get_text(ans)
            if ans:
                with suppress(Exception):
                    return {'authors': json.loads(ans)}
        return {}

    def __set__(self, obj, val):
        elem = obj.get_metadata_element('author_link_map')
        if elem is not None:
            elem.getparent().remove(elem)
        elem = obj.get_metadata_element('link_maps')
        if not val:
            if elem is not None:
                elem.getparent().remove(elem)
            return
        if elem is None:
            elem = obj.create_metadata_element('link_maps', is_dc=False)
        obj.set_text(elem, dump_dict(val))


class TitleSortField(MetadataField):

    def __get__(self, obj, type=None):
        c = self.__real_get__(obj, type)
        if c is None:
            matches = obj.title_path(obj.metadata)
            if matches:
                for match in matches:
                    ans = match.get('{{{}}}file-as'.format(obj.NAMESPACES['opf']), None)
                    if not ans:
                        ans = match.get('file-as', None)
                    if ans:
                        c = ans
        if not c:
            c = self.none_is
        else:
            c = c.strip()
        return c

    def __set__(self, obj, val):
        MetadataField.__set__(self, obj, val)
        matches = obj.title_path(obj.metadata)
        if matches:
            for match in matches:
                for attr in list(match.attrib):
                    if attr.endswith('file-as'):
                        del match.attrib[attr]


def serialize_user_metadata(metadata_elem, all_user_metadata, tail='\n'+(' '*8)):
    from calibre.ebooks.metadata.book.json_codec import encode_is_multiple, object_to_unicode
    from calibre.utils.config import to_json

    for name, fm in all_user_metadata.items():
        try:
            fm = copy.copy(fm)
            if (fm.get('datatype', 'text') == 'composite'
              and not fm.get('display', {}).get('composite_store_template_value_in_opf', True)):
                fm['#value#'] = ''
            encode_is_multiple(fm)
            fm = object_to_unicode(fm)
            fm = json.dumps(fm, default=to_json, ensure_ascii=False)
        except Exception:
            prints('Failed to write user metadata:', name)
            import traceback
            traceback.print_exc()
            continue
        meta = metadata_elem.makeelement('meta')
        meta.set('name', 'calibre:user_metadata:'+name)
        meta.set('content', fm)
        meta.tail = tail
        metadata_elem.append(meta)


def serialize_annotations(metadata_elem, annotations, tail='\n'+(' '*8)):
    for item in annotations:
        data = json.dumps(item, ensure_ascii=False)
        if isinstance(data, bytes):
            data = data.decode('utf-8')
        meta = metadata_elem.makeelement('meta')
        meta.set('name', 'calibre:annotation')
        meta.set('content', data)
        meta.tail = tail
        metadata_elem.append(meta)


def dump_dict(cats):
    if not cats:
        cats = {}
    from calibre.ebooks.metadata.book.json_codec import object_to_unicode
    return json.dumps(object_to_unicode(cats), ensure_ascii=False,
            skipkeys=True)


XPATH_NS = {
    'dc' : 'http://purl.org/dc/elements/1.1/',
    'opf': 'http://www.idpf.org/2007/opf',
    're' : 'http://exslt.org/regular-expressions'
}
XPath = functools.partial(etree.XPath, namespaces=XPATH_NS)


class OPF:  # {{{

    MIMETYPE         = 'application/oebps-package+xml'
    NAMESPACES       = {
                        None: 'http://www.idpf.org/2007/opf',
                        'dc': 'http://purl.org/dc/elements/1.1/',
                        'opf': 'http://www.idpf.org/2007/opf',
                       }
    META             = '{{{}}}meta'.format(NAMESPACES['opf'])
    CONTENT          = XPath('self::*[re:match(name(), "meta$", "i")]/@content')
    TEXT             = XPath('string()')

    metadata_path   = XPath('descendant::*[re:match(name(), "metadata", "i")]')
    metadata_elem_path = XPath(
        'descendant::*[re:match(name(), concat($name, "$"), "i") or (re:match(name(), "meta$", "i") '
        'and re:match(@name, concat("^calibre:", $name, "$"), "i"))]')
    title_path      = XPath('descendant::*[re:match(name(), "title", "i")]')
    authors_path    = XPath('descendant::*[re:match(name(), "creator", "i") and (@role="aut" or @opf:role="aut" or (not(@role) and not(@opf:role)))]')
    editors_path    = XPath('descendant::*[re:match(name(), "creator", "i") and (@role="edt" or @opf:role="edt")]')
    bkp_path        = XPath('descendant::*[re:match(name(), "contributor", "i") and (@role="bkp" or @opf:role="bkp")]')
    tags_path       = XPath('descendant::*[re:match(name(), "subject", "i")]')
    isbn_path       = XPath('descendant::*[re:match(name(), "identifier", "i") and '
                            '(re:match(@scheme, "isbn", "i") or re:match(@opf:scheme, "isbn", "i"))]')
    pubdate_path    = XPath('descendant::*[re:match(name(), "date", "i")]')
    raster_cover_path = XPath('descendant::*[re:match(name(), "meta", "i") and '
            're:match(@name, "cover", "i") and @content]')
    guide_cover_path = XPath('descendant::*[local-name()="guide"]/*[local-name()="reference" and re:match(@type, "cover", "i")]/@href')
    identifier_path = XPath('descendant::*[re:match(name(), "identifier", "i")]')
    application_id_path = XPath('descendant::*[re:match(name(), "identifier", "i") and '
                            '(re:match(@opf:scheme, "calibre|libprs500", "i") or re:match(@scheme, "calibre|libprs500", "i"))]')
    uuid_id_path    = XPath('descendant::*[re:match(name(), "identifier", "i") and '
                            '(re:match(@opf:scheme, "uuid", "i") or re:match(@scheme, "uuid", "i"))]')
    languages_path  = XPath('descendant::*[local-name()="language"]')

    manifest_path   = XPath('descendant::*[re:match(name(), "manifest", "i")]/*[re:match(name(), "item", "i")]')
    manifest_ppath  = XPath('descendant::*[re:match(name(), "manifest", "i")]')
    spine_path      = XPath('descendant::*[re:match(name(), "spine", "i")]/*[re:match(name(), "itemref", "i")]')
    guide_path      = XPath('descendant::*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]')

    publisher       = MetadataField('publisher')
    comments        = MetadataField('description')
    category        = MetadataField('type')
    rights          = MetadataField('rights')
    series          = MetadataField('series', is_dc=False)
    if tweaks['use_series_auto_increment_tweak_when_importing']:
        series_index    = MetadataField('series_index', is_dc=False,
                                        formatter=float, none_is=None)
    else:
        series_index    = MetadataField('series_index', is_dc=False,
                                        formatter=float, none_is=1)
    title_sort      = TitleSortField('title_sort', is_dc=False)
    rating          = MetadataField('rating', is_dc=False, formatter=float)
    publication_type = MetadataField('publication_type', is_dc=False)
    timestamp       = MetadataField('timestamp', is_dc=False,
                                    formatter=parse_date, renderer=isoformat)
    user_categories = MetadataField('user_categories', is_dc=False,
                                    formatter=json.loads,
                                    renderer=dump_dict)
    link_maps = LinkMapsField()

    def __init__(self, stream, basedir=os.getcwd(), unquote_urls=True,
            populate_spine=True, try_to_guess_cover=False, preparsed_opf=None, read_toc=True):
        self.try_to_guess_cover = try_to_guess_cover
        self.basedir  = self.base_dir = basedir
        self.path_to_html_toc = self.html_toc_fragment = None
        self.root = parse_opf(stream) if preparsed_opf is None else preparsed_opf
        try:
            self.package_version = float(self.root.get('version', None))
        except (AttributeError, TypeError, ValueError):
            self.package_version = 0
        self.metadata = self.metadata_path(self.root)
        if not self.metadata:
            self.metadata = [self.root.makeelement('{http://www.idpf.org/2007/opf}metadata')]
            self.root.insert(0, self.metadata[0])
            self.metadata[0].tail = '\n'
        self.metadata      = self.metadata[0]
        if unquote_urls:
            self.unquote_urls()
        self.manifest = Manifest()
        m = self.manifest_path(self.root)
        if m:
            self.manifest = Manifest.from_opf_manifest_element(m, basedir)
        self.spine = None
        s = self.spine_path(self.root)
        if populate_spine and s:
            self.spine = Spine.from_opf_spine_element(s, self.manifest)
        self.guide = None
        guide = self.guide_path(self.root)
        self.guide = Guide.from_opf_guide(guide, basedir) if guide else None
        self.cover_data = (None, None)
        if read_toc:
            self.find_toc()
        else:
            self.toc = None
        self.read_user_metadata()

    def read_user_metadata(self):
        self._user_metadata_ = {}
        temp = Metadata('x', ['x'])
        from calibre.ebooks.metadata.book.json_codec import decode_is_multiple
        from calibre.utils.config import from_json
        elems = self.root.xpath('//*[name() = "meta" and starts-with(@name,'
                '"calibre:user_metadata:") and @content]')
        for elem in elems:
            name = elem.get('name')
            name = ':'.join(name.split(':')[2:])
            if not name or not name.startswith('#'):
                continue
            fm = elem.get('content')
            try:
                fm = json.loads(fm, object_hook=from_json)
                decode_is_multiple(fm)
                temp.set_user_metadata(name, fm)
            except Exception:
                prints('Failed to read user metadata:', name)
                import traceback
                traceback.print_exc()
                continue
        self._user_metadata_ = temp.get_all_user_metadata(True)

    def to_book_metadata(self):
        if self.package_version >= 3.0:
            from calibre.ebooks.metadata.opf3 import read_metadata
            return read_metadata(self.root)
        # avoid deepcopy of non-metadata items
        manifest, spine, guide, toc = self.manifest, self.spine, self.guide, self.toc
        self.manifest = self.spine = self.guide = self.toc = None
        try:
            ans = MetaInformation(self)
        finally:
            self.manifest, self.spine, self.guide, self.toc = manifest, spine, guide, toc
        for n, v in self._user_metadata_.items():
            ans.set_user_metadata(n, v)

        ans.set_identifiers(self.get_identifiers())
        ans.link_maps = self.link_maps
        ans.cover = self.cover  # needed because we nuke the guide while creating ans

        return ans

    def read_annotations(self):
        for elem in self.root.xpath('//*[name() = "meta" and @name = "calibre:annotation" and @content]'):
            try:
                yield json.loads(elem.get('content'))
            except Exception:
                pass

    def write_user_metadata(self):
        elems = self.root.xpath('//*[name() = "meta" and starts-with(@name,'
                '"calibre:user_metadata:") and @content]')
        for elem in elems:
            elem.getparent().remove(elem)
        serialize_user_metadata(self.metadata,
                self._user_metadata_)

    def find_toc(self):
        self.toc = None
        try:
            spine = XPath('descendant::*[re:match(name(), "spine", "i")]')(self.root)
            toc = None
            if spine:
                spine = spine[0]
                toc = spine.get('toc', None)
            if toc is None and self.guide:
                for item in self.guide:
                    if item.type and item.type.lower() == 'toc':
                        toc = item.path
            if toc is None:
                for item in self.manifest:
                    if 'toc' in item.href().lower():
                        toc = item.path
            if toc is None:
                return
            self.toc = TOC(base_path=self.base_dir)
            is_ncx = getattr(self, 'manifest', None) is not None and \
                     self.manifest.type_for_id(toc) is not None and \
                     'dtbncx' in self.manifest.type_for_id(toc)
            if is_ncx or toc.lower() in ('ncx', 'ncxtoc'):
                path = self.manifest.path_for_id(toc)
                if path:
                    self.toc.read_ncx_toc(path)
                else:
                    f = glob.glob(os.path.join(self.base_dir, '*.ncx'))
                    if f:
                        self.toc.read_ncx_toc(f[0])
            else:
                self.path_to_html_toc, self.html_toc_fragment = \
                    toc.partition('#')[0], toc.partition('#')[-1]
                if not os.access(self.path_to_html_toc, os.R_OK) or \
                        not os.path.isfile(self.path_to_html_toc):
                    self.path_to_html_toc = None
                self.toc.read_html_toc(toc)
        except Exception:
            pass

    def get_text(self, elem):
        return ''.join(self.CONTENT(elem) or self.TEXT(elem))

    def set_text(self, elem, content):
        if elem.tag == self.META:
            elem.attrib['content'] = content
        else:
            elem.text = content

    def itermanifest(self):
        return self.manifest_path(self.root)

    def create_manifest_item(self, href, media_type, append=False):
        ids = {i.get('id', None) for i in self.itermanifest()}
        manifest_id = 'id1'
        c = 1
        while manifest_id in ids:
            c += 1
            manifest_id = f'id{c}'
        if not media_type:
            media_type = 'application/xhtml+xml'
        ans = etree.Element('{{{}}}item'.format(self.NAMESPACES['opf']),
                             attrib={'id':manifest_id, 'href':href, 'media-type':media_type})
        ans.tail = '\n\t\t'
        if append:
            manifest = self.manifest_ppath(self.root)[0]
            manifest.append(ans)
        return ans

    def replace_manifest_item(self, item, items):
        items = [self.create_manifest_item(*i) for i in items]
        for i, item2 in enumerate(items):
            item2.set('id', item.get('id')+f'.{i + 1}')
        manifest = item.getparent()
        index = manifest.index(item)
        manifest[index:index+1] = items
        return [i.get('id') for i in items]

    def iterspine(self):
        return self.spine_path(self.root)

    def spine_items(self):
        for item in self.iterspine():
            idref = item.get('idref', '')
            for x in self.itermanifest():
                if x.get('id', None) == idref:
                    yield x.get('href', '')

    def first_spine_item(self):
        items = self.iterspine()
        if not items:
            return None
        idref = items[0].get('idref', '')
        for x in self.itermanifest():
            if x.get('id', None) == idref:
                return x.get('href', None)

    def create_spine_item(self, idref):
        ans = etree.Element('{{{}}}itemref'.format(self.NAMESPACES['opf']), idref=idref)
        ans.tail = '\n\t\t'
        return ans

    def replace_spine_items_by_idref(self, idref, new_idrefs):
        items = list(map(self.create_spine_item, new_idrefs))
        spine = XPath('/opf:package/*[re:match(name(), "spine", "i")]')(self.root)[0]
        old = [i for i in self.iterspine() if i.get('idref', None) == idref]
        for x in old:
            i = spine.index(x)
            spine[i:i+1] = items

    def create_guide_element(self):
        e = etree.SubElement(self.root, '{{{}}}guide'.format(self.NAMESPACES['opf']))
        e.text = '\n        '
        e.tail = '\n'
        return e

    def remove_guide(self):
        self.guide = None
        for g in self.root.xpath('./*[re:match(name(), "guide", "i")]', namespaces={'re':'http://exslt.org/regular-expressions'}):
            self.root.remove(g)

    def create_guide_item(self, type, title, href):
        e = etree.Element('{{{}}}reference'.format(self.NAMESPACES['opf']),
                             type=type, title=title, href=href)
        e.tail='\n'
        return e

    def add_guide_item(self, type, title, href):
        g = self.root.xpath('./*[re:match(name(), "guide", "i")]', namespaces={'re':'http://exslt.org/regular-expressions'})[0]
        g.append(self.create_guide_item(type, title, href))

    def iterguide(self):
        return self.guide_path(self.root)

    def unquote_urls(self):
        def get_href(item):
            raw = unquote(item.get('href', ''))
            if not isinstance(raw, str):
                raw = raw.decode('utf-8')
            return raw
        for item in self.itermanifest():
            item.set('href', get_href(item))
        for item in self.iterguide():
            item.set('href', get_href(item))

    @property
    def title(self):
        # TODO: Add support for EPUB 3 refinements

        for elem in self.title_path(self.metadata):
            title = self.get_text(elem)
            if title and title.strip():
                return re.sub(r'\s+', ' ', title.strip())

    @title.setter
    def title(self, val):
        val = (val or '').strip()
        titles = self.title_path(self.metadata)
        if self.package_version < 3:
            # EPUB 3 allows multiple title elements containing sub-titles,
            # series and other things. We all loooove EPUB 3.
            for title in titles:
                title.getparent().remove(title)
            titles = ()
        if val:
            title = titles[0] if titles else self.create_metadata_element('title')
            title.text = re.sub(r'\s+', ' ', str(val))

    @property
    def authors(self):
        ans = []
        for elem in self.authors_path(self.metadata):
            ans.extend(string_to_authors(self.get_text(elem)))
        if not ans:
            for elem in self.editors_path(self.metadata):
                ans.extend(string_to_authors(self.get_text(elem)))
        return ans

    @authors.setter
    def authors(self, val):
        remove = list(self.authors_path(self.metadata)) or list(self.editors_path(self.metadata))
        for elem in remove:
            elem.getparent().remove(elem)
        # Ensure new author element is at the top of the list
        # for broken implementations that always use the first
        # <dc:creator> element with no attention to the role
        for author in reversed(val):
            elem = self.metadata.makeelement('{{{}}}creator'.format(self.NAMESPACES['dc']), nsmap=self.NAMESPACES)
            elem.tail = '\n'
            self.metadata.insert(0, elem)
            elem.set('{{{}}}role'.format(self.NAMESPACES['opf']), 'aut')
            self.set_text(elem, author.strip())

    @property
    def author_sort(self):
        matches = self.authors_path(self.metadata) or self.editors_path(self.metadata)
        if matches:
            for match in matches:
                ans = match.get('{{{}}}file-as'.format(self.NAMESPACES['opf'])) or match.get('file-as')
                if ans:
                    return ans

    @author_sort.setter
    def author_sort(self, val):
        matches = self.authors_path(self.metadata) or self.editors_path(self.metadata)
        if matches:
            for key in matches[0].attrib:
                if key.endswith('file-as'):
                    matches[0].attrib.pop(key)
            matches[0].set('{{{}}}file-as'.format(self.NAMESPACES['opf']), str(val))

    @property
    def tags(self):
        ans = []
        for tag in self.tags_path(self.metadata):
            text = self.get_text(tag)
            if text and text.strip():
                ans.extend([x.strip() for x in text.split(',')])
        return ans

    @tags.setter
    def tags(self, val):
        for tag in list(self.tags_path(self.metadata)):
            tag.getparent().remove(tag)
        for tag in val:
            elem = self.create_metadata_element('subject')
            self.set_text(elem, str(tag))

    @property
    def pubdate(self):
        ans = None
        for match in self.pubdate_path(self.metadata):
            try:
                val = parse_date(etree.tostring(match, encoding='unicode',
                    method='text', with_tail=False).strip())
            except Exception:
                continue
            if ans is None or val < ans:
                ans = val
        return ans

    @pubdate.setter
    def pubdate(self, val):
        least_val = least_elem = None
        for match in self.pubdate_path(self.metadata):
            try:
                cval = parse_date(etree.tostring(match, encoding='unicode',
                    method='text', with_tail=False).strip())
            except Exception:
                match.getparent().remove(match)
            else:
                if not val:
                    match.getparent().remove(match)
                if least_val is None or cval < least_val:
                    least_val, least_elem = cval, match

        if val:
            if least_val is None:
                least_elem = self.create_metadata_element('date')

            least_elem.attrib.clear()
            least_elem.text = isoformat(val)

    @property
    def isbn(self):
        for match in self.isbn_path(self.metadata):
            return self.get_text(match) or None

    @isbn.setter
    def isbn(self, val):
        uuid_id = None
        for attr in self.root.attrib:
            if attr.endswith('unique-identifier'):
                uuid_id = self.root.attrib[attr]
                break

        matches = self.isbn_path(self.metadata)
        if not val:
            for x in matches:
                xid = x.get('id', None)
                is_package_identifier = uuid_id is not None and uuid_id == xid
                if is_package_identifier:
                    self.set_text(x, str(uuid.uuid4()))
                    for attr in x.attrib:
                        if attr.endswith('scheme'):
                            x.attrib[attr] = 'uuid'
                else:
                    x.getparent().remove(x)
            return
        if not matches:
            attrib = {'{{{}}}scheme'.format(self.NAMESPACES['opf']): 'ISBN'}
            matches = [self.create_metadata_element('identifier',
                                                    attrib=attrib)]
        self.set_text(matches[0], str(val))

    def get_identifiers(self):
        identifiers = {}
        schemeless = []
        for x in XPath(
            'descendant::*[local-name() = "identifier" and text()]')(
                    self.metadata):
            found_scheme = False
            for attr, val in x.attrib.items():
                if attr.endswith('scheme'):
                    typ = icu_lower(val)
                    val = etree.tostring(x, with_tail=False, encoding='unicode',
                            method='text').strip()
                    if val and typ not in ('calibre', 'uuid'):
                        if typ == 'isbn' and val.lower().startswith('urn:isbn:'):
                            val = val[len('urn:isbn:'):]
                        identifiers[typ] = val
                    found_scheme = True
                    break
            if not found_scheme:
                val = etree.tostring(x, with_tail=False, encoding='unicode',
                            method='text').strip()
                if val.lower().startswith('urn:isbn:'):
                    val = check_isbn(val.split(':')[-1])
                    if val is not None:
                        identifiers['isbn'] = val
                else:
                    schemeless.append(val)

        if schemeless and 'isbn' not in identifiers:
            for val in schemeless:
                if check_isbn(val, simple_sanitize=True) is not None:
                    identifiers['isbn'] = check_isbn(val)
                    break

        return identifiers

    def set_identifiers(self, identifiers):
        identifiers = identifiers.copy()
        uuid_id = None
        for attr in self.root.attrib:
            if attr.endswith('unique-identifier'):
                uuid_id = self.root.attrib[attr]
                break

        for x in XPath(
            'descendant::*[local-name() = "identifier"]')(
                    self.metadata):
            xid = x.get('id', None)
            is_package_identifier = uuid_id is not None and uuid_id == xid
            typ = {val.lower() for attr, val in x.attrib.items() if attr.endswith('scheme')}
            if is_package_identifier:
                typ = tuple(typ)
                if typ and typ[0] in identifiers:
                    self.set_text(x, identifiers.pop(typ[0]))
                continue
            if typ and not (typ & {'calibre', 'uuid'}):
                x.getparent().remove(x)

        for typ, val in identifiers.items():
            attrib = {'{{{}}}scheme'.format(self.NAMESPACES['opf']): typ.upper()}
            self.set_text(self.create_metadata_element(
                'identifier', attrib=attrib), str(val))

    @property
    def application_id(self):
        for match in self.application_id_path(self.metadata):
            return self.get_text(match) or None

    @application_id.setter
    def application_id(self, val):
        removed_ids = set()
        for x in tuple(self.application_id_path(self.metadata)):
            removed_ids.add(x.get('id', None))
            x.getparent().remove(x)

        uuid_id = None
        for attr in self.root.attrib:
            if attr.endswith('unique-identifier'):
                uuid_id = self.root.attrib[attr]
                break
        attrib = {'{{{}}}scheme'.format(self.NAMESPACES['opf']): 'calibre'}
        if uuid_id and uuid_id in removed_ids:
            attrib['id'] = uuid_id
        self.set_text(self.create_metadata_element(
            'identifier', attrib=attrib), str(val))

    @property
    def uuid(self):
        for match in self.uuid_id_path(self.metadata):
            return self.get_text(match) or None

    @uuid.setter
    def uuid(self, val):
        matches = self.uuid_id_path(self.metadata)
        if not matches:
            attrib = {'{{{}}}scheme'.format(self.NAMESPACES['opf']): 'uuid'}
            matches = [self.create_metadata_element('identifier',
                                                    attrib=attrib)]
        self.set_text(matches[0], str(val))

    @property
    def language(self):
        ans = self.languages
        if ans:
            return ans[0]

    @language.setter
    def language(self, val):
        self.languages = [val]

    @property
    def languages(self):
        ans = []
        for match in self.languages_path(self.metadata):
            t = self.get_text(match)
            if t and t.strip():
                l = canonicalize_lang(t.strip())
                if l:
                    ans.append(l)
        return ans

    @languages.setter
    def languages(self, val):
        matches = self.languages_path(self.metadata)
        for x in matches:
            x.getparent().remove(x)

        num_done = 0
        for lang in val:
            l = self.create_metadata_element('language')
            self.set_text(l, str(lang))
            num_done += 1
        if num_done == 0:
            l = self.create_metadata_element('language')
            self.set_text(l, 'und')

    @property
    def raw_languages(self):
        for match in self.languages_path(self.metadata):
            t = self.get_text(match)
            if t and t.strip():
                yield t.strip()

    @property
    def book_producer(self):
        for match in self.bkp_path(self.metadata):
            return self.get_text(match) or None

    @book_producer.setter
    def book_producer(self, val):
        matches = self.bkp_path(self.metadata)
        if not matches:
            matches = [self.create_metadata_element('contributor')]
            matches[0].set('{{{}}}role'.format(self.NAMESPACES['opf']), 'bkp')
        self.set_text(matches[0], str(val))

    def identifier_iter(self):
        yield from self.identifier_path(self.metadata)

    @property
    def raw_unique_identifier(self):
        uuid_elem = None
        for attr in self.root.attrib:
            if attr.endswith('unique-identifier'):
                uuid_elem = self.root.attrib[attr]
                break
        if uuid_elem:
            matches = self.root.xpath(f'//*[@id={escape_xpath_attr(uuid_elem)}]')
            if matches:
                for m in matches:
                    raw = m.text
                    if raw:
                        return raw

    @property
    def unique_identifier(self):
        raw = self.raw_unique_identifier
        if raw:
            return raw.rpartition(':')[-1]

    @property
    def page_progression_direction(self):
        spine = XPath('descendant::*[re:match(name(), "spine", "i")][1]')(self.root)
        if spine:
            for k, v in spine[0].attrib.items():
                if k == 'page-progression-direction' or k.endswith('}page-progression-direction'):
                    return v

    @property
    def primary_writing_mode(self):
        for m in XPath('//*[local-name()="meta" and @name="primary-writing-mode" and @content]')(self.root):
            return m.get('content')

    @property
    def epub3_raster_cover(self):
        for item in self.itermanifest():
            props = set((item.get('properties') or '').lower().split())
            if 'cover-image' in props:
                mt = item.get('media-type', '')
                if mt and 'xml' not in mt and 'html' not in mt:
                    return item.get('href', None)

    @property
    def raster_cover(self):
        covers = self.raster_cover_path(self.metadata)
        if covers:
            cover_id = covers[0].get('content')
            for item in self.itermanifest():
                if item.get('id', None) == cover_id:
                    mt = item.get('media-type', '')
                    if mt and 'xml' not in mt and 'html' not in mt:
                        return item.get('href', None)
            for item in self.itermanifest():
                if item.get('href', None) == cover_id:
                    mt = item.get('media-type', '')
                    if mt and 'xml' not in mt and 'html' not in mt:
                        return item.get('href', None)
        elif self.package_version >= 3.0:
            return self.epub3_raster_cover

    @property
    def guide_raster_cover(self):
        covers = self.guide_cover_path(self.root)
        if covers:
            mt_map = {i.get('href'):i for i in self.itermanifest()}
            for href in covers:
                if href:
                    i = mt_map.get(href)
                    if i is not None:
                        iid, mt = i.get('id'), i.get('media-type')
                        if iid and mt and mt.lower() in {'image/png', 'image/jpeg', 'image/jpg', 'image/gif'}:
                            return i

    @property
    def epub3_nav(self):
        if self.package_version >= 3.0:
            for item in self.itermanifest():
                props = (item.get('properties') or '').lower().split()
                if 'nav' in props:
                    mt = item.get('media-type') or ''
                    if 'html' in mt.lower():
                        mid = item.get('id')
                        if mid:
                            path = self.manifest.path_for_id(mid)
                            if path and os.path.exists(path):
                                return path

    @property
    def cover(self):
        if self.guide is not None:
            for t in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'):
                for item in self.guide:
                    if item.type and item.type.lower() == t:
                        return item.path

    @cover.setter
    def cover(self, path):
        if self.guide is not None:
            self.guide.set_cover(path)
            for item in list(self.iterguide()):
                if 'cover' in item.get('type', ''):
                    item.getparent().remove(item)

        else:
            g = self.create_guide_element()
            self.guide = Guide()
            self.guide.set_cover(path)
            etree.SubElement(g, 'opf:reference', nsmap=self.NAMESPACES,
                                attrib={'type':'cover', 'href':self.guide[-1].href()})
        id = self.manifest.id_for_path(self.cover)
        if id is None:
            for t in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'):
                for item in self.guide:
                    if item.type.lower() == t:
                        self.create_manifest_item(item.href(), guess_type(path)[0])

    def get_metadata_element(self, name):
        matches = self.metadata_elem_path(self.metadata, name=name)
        if matches:
            return matches[-1]

    def create_metadata_element(self, name, attrib=None, is_dc=True):
        if is_dc:
            name = '{{{}}}{}'.format(self.NAMESPACES['dc'], name)
        else:
            attrib = attrib or {}
            attrib['name'] = 'calibre:' + name
            name = '{{{}}}{}'.format(self.NAMESPACES['opf'], 'meta')
        nsmap = dict(self.NAMESPACES)
        del nsmap['opf']
        elem = etree.SubElement(self.metadata, name, attrib=attrib,
                                nsmap=nsmap)
        elem.tail = '\n'
        return elem

    def render(self, encoding='utf-8'):
        for meta in self.raster_cover_path(self.metadata):
            # Ensure that the name attribute occurs before the content
            # attribute. Needed for Nooks.
            a = meta.attrib
            c = a.get('content', None)
            if c is not None:
                del a['content']
                a['content'] = c
        # The PocketBook requires calibre:series_index to come after
        # calibre:series or it fails to read series info
        # We swap attributes instead of elements, as that avoids namespace
        # re-declarations
        smap = {}
        for child in self.metadata.xpath('./*[@name="calibre:series" or @name="calibre:series_index"]'):
            smap[child.get('name')] = (child, self.metadata.index(child))
        if len(smap) == 2 and smap['calibre:series'][1] > smap['calibre:series_index'][1]:
            s, si = smap['calibre:series'][0], smap['calibre:series_index'][0]

            def swap(attr):
                t = s.get(attr, '')
                s.set(attr, si.get(attr, '')), si.set(attr, t)
            swap('name'), swap('content')

        self.write_user_metadata()
        if pretty_print_opf:
            _pretty_print(self.root)
        raw = etree.tostring(self.root, encoding=encoding, pretty_print=True)
        if not raw.lstrip().startswith(b'<?xml '):
            raw = (f'<?xml version="1.0"  encoding="{encoding.upper()}"?>\n').encode('ascii') + raw
        return raw

    def smart_update(self, mi, replace_metadata=False, apply_null=False):
        for attr in ('title', 'authors', 'author_sort', 'title_sort',
                     'publisher', 'series', 'series_index', 'rating',
                     'isbn', 'tags', 'category', 'comments', 'book_producer',
                     'pubdate', 'user_categories', 'link_maps'):
            val = getattr(mi, attr, None)
            if attr == 'rating' and val:
                val = float(val)
            is_null = val is None or val in ((), [], (None, None), {}) or (attr == 'rating' and (not val or val < 0.1))
            if is_null:
                if apply_null and attr in {'series', 'tags', 'isbn', 'comments', 'publisher', 'rating'}:
                    setattr(self, attr, ([] if attr == 'tags' else None))
            else:
                setattr(self, attr, val)
        langs = getattr(mi, 'languages', [])
        if langs == ['und']:
            langs = []
        if apply_null or langs:
            self.languages = langs or []
        temp = self.to_book_metadata()
        temp.remove_stale_user_metadata(mi)
        temp.smart_update(mi, replace_metadata=replace_metadata)
        if not replace_metadata and callable(getattr(temp, 'custom_field_keys', None)):
            # We have to replace non-null fields regardless of the value of
            # replace_metadata to match the behavior of the builtin fields
            # above.
            for x in temp.custom_field_keys():
                meta = temp.get_user_metadata(x, make_copy=True)
                if meta is None:
                    continue
                if meta['datatype'] == 'text' and meta['is_multiple']:
                    val = mi.get(x, [])
                    if val or apply_null:
                        temp.set(x, val)
                elif meta['datatype'] in {'int', 'float', 'bool'}:
                    missing = object()
                    val = mi.get(x, missing)
                    if val is missing:
                        if apply_null:
                            temp.set(x, None)
                    elif apply_null or val is not None:
                        temp.set(x, val)
                elif apply_null and mi.is_null(x) and not temp.is_null(x):
                    temp.set(x, None)

        self._user_metadata_ = temp.get_all_user_metadata(True)

# }}}


class OPFCreator(Metadata):

    def __init__(self, base_path, other):
        '''
        Initialize.
        @param base_path: An absolute path to the folder in which this OPF file
        will eventually be. This is used by the L{create_manifest} method
        to convert paths to files into relative paths.
        '''
        Metadata.__init__(self, title='', other=other)
        self.base_path = os.path.abspath(base_path)
        self.page_progression_direction = None
        self.primary_writing_mode = None
        if self.application_id is None:
            self.application_id = str(uuid.uuid4())
        if not isinstance(self.toc, TOC):
            self.toc = None
        if not self.authors:
            self.authors = [_('Unknown')]
        if self.guide is None:
            self.guide = Guide()
        if self.cover:
            self.guide.set_cover(self.cover)

    def create_manifest(self, entries):
        '''
        Create <manifest>

        `entries`: List of (path, mime-type) If mime-type is None it is autodetected
        '''
        entries = [x if os.path.isabs(x[0]) else
                      (os.path.abspath(os.path.join(self.base_path, x[0])), x[1]) for x in entries]
        self.manifest = Manifest.from_paths(entries)
        self.manifest.set_basedir(self.base_path)

    def create_manifest_from_files_in(self, files_and_dirs,
            exclude=lambda x:False):
        entries = []

        def dodir(dir):
            for spec in os.walk(dir):
                root, files = spec[0], spec[-1]
                for name in files:
                    path = os.path.join(root, name)
                    if os.path.isfile(path) and not exclude(path):
                        entries.append((path, None))

        for i in files_and_dirs:
            if os.path.isdir(i):
                dodir(i)
            else:
                entries.append((i, None))

        self.create_manifest(entries)

    def create_spine(self, entries):
        '''
        Create the <spine> element. Must first call :method:`create_manifest`.

        `entries`: List of paths
        '''
        entries = [x if os.path.isabs(x) else
                      os.path.abspath(os.path.join(self.base_path, x)) for x in entries]
        self.spine = Spine.from_paths(entries, self.manifest)

    def set_toc(self, toc):
        '''
        Set the toc. You must call :method:`create_spine` before calling this
        method.

        :param toc: A :class:`TOC` object
        '''
        self.toc = toc

    def create_guide(self, guide_element):
        self.guide = Guide.from_opf_guide(guide_element, self.base_path)
        self.guide.set_basedir(self.base_path)

    def render(self, opf_stream=sys.stdout, ncx_stream=None,
               ncx_manifest_entry=None, encoding=None, process_guide=None):
        if encoding is None:
            encoding = 'utf-8'
        toc = getattr(self, 'toc', None)
        if self.manifest:
            self.manifest.set_basedir(self.base_path)
            if ncx_manifest_entry is not None and toc is not None:
                if not os.path.isabs(ncx_manifest_entry):
                    ncx_manifest_entry = os.path.join(self.base_path, ncx_manifest_entry)
                remove = [i for i in self.manifest if i.id == 'ncx']
                for item in remove:
                    self.manifest.remove(item)
                self.manifest.append(ManifestItem(ncx_manifest_entry, self.base_path))
                self.manifest[-1].id = 'ncx'
                self.manifest[-1].mime_type = 'application/x-dtbncx+xml'
        if self.guide is None:
            self.guide = Guide()
        if self.cover:
            cover = self.cover
            if not os.path.isabs(cover):
                cover = os.path.abspath(os.path.join(self.base_path, cover))
            self.guide.set_cover(cover)
        self.guide.set_basedir(self.base_path)

        # Actual rendering
        from lxml.builder import ElementMaker

        from calibre.ebooks.oeb.base import CALIBRE_NS, DC11_NS, OPF2_NS
        DNS = OPF2_NS+'___xx___'
        E = ElementMaker(namespace=DNS, nsmap={None:DNS})
        M = ElementMaker(namespace=DNS,
                nsmap={'dc':DC11_NS, 'calibre':CALIBRE_NS, 'opf':OPF2_NS})
        DC = ElementMaker(namespace=DC11_NS)

        def DC_ELEM(tag, text, dc_attrs={}, opf_attrs={}):
            if text:
                elem = getattr(DC, tag)(clean_ascii_chars(text), **dc_attrs)
            else:
                elem = getattr(DC, tag)(**dc_attrs)
            for k, v in opf_attrs.items():
                elem.set(f'{{{OPF2_NS}}}{k}', v)
            return elem

        def CAL_ELEM(name, content):
            return M.meta(name=name, content=content)

        metadata = M.metadata()
        a = metadata.append
        role = {}
        a(DC_ELEM('title', self.title if self.title else _('Unknown'),
            opf_attrs=role))
        for i, author in enumerate(self.authors):
            fa = {'role':'aut'}
            if i == 0 and self.author_sort:
                fa['file-as'] = self.author_sort
            a(DC_ELEM('creator', author, opf_attrs=fa))
        a(DC_ELEM('contributor', '{} ({}) [{}]'.format(__appname__, __version__,
            'https://calibre-ebook.com'), opf_attrs={'role':'bkp',
                'file-as':__appname__}))
        a(DC_ELEM('identifier', str(self.application_id),
            opf_attrs={'scheme':__appname__},
            dc_attrs={'id':__appname__+'_id'}))
        if getattr(self, 'pubdate', None) is not None:
            a(DC_ELEM('date', self.pubdate.isoformat()))
        langs = self.languages
        if not langs or langs == ['und']:
            langs = [get_lang().replace('_', '-').partition('-')[0]]
        for lang in langs:
            a(DC_ELEM('language', lang))
        if self.comments:
            a(DC_ELEM('description', self.comments))
        if self.publisher:
            a(DC_ELEM('publisher', self.publisher))
        for key, val in self.get_identifiers().items():
            a(DC_ELEM('identifier', val, opf_attrs={'scheme':icu_upper(key)}))
        if self.rights:
            a(DC_ELEM('rights', self.rights))
        if self.tags:
            for tag in self.tags:
                a(DC_ELEM('subject', tag))
        if self.series:
            a(CAL_ELEM('calibre:series', self.series))
            if self.series_index is not None:
                a(CAL_ELEM('calibre:series_index', self.format_series_index()))
        if self.title_sort:
            a(CAL_ELEM('calibre:title_sort', self.title_sort))
        if self.rating is not None:
            a(CAL_ELEM('calibre:rating', str(self.rating)))
        if self.timestamp is not None:
            a(CAL_ELEM('calibre:timestamp', self.timestamp.isoformat()))
        if self.publication_type is not None:
            a(CAL_ELEM('calibre:publication_type', self.publication_type))
        if self.user_categories:
            from calibre.ebooks.metadata.book.json_codec import object_to_unicode
            a(CAL_ELEM('calibre:user_categories',
                       json.dumps(object_to_unicode(self.user_categories))))
        if self.primary_writing_mode:
            a(M.meta(name='primary-writing-mode', content=self.primary_writing_mode))
        manifest = E.manifest()
        if self.manifest is not None:
            for ref in self.manifest:
                href = ref.href()
                if isinstance(href, bytes):
                    href = href.decode('utf-8')
                item = E.item(id=str(ref.id), href=href)
                item.set('media-type', ref.mime_type)
                manifest.append(item)
        spine = E.spine()
        if self.toc is not None:
            spine.set('toc', 'ncx')
        if self.page_progression_direction is not None:
            spine.set('page-progression-direction', self.page_progression_direction)
        if self.spine is not None:
            for ref in self.spine:
                if ref.id is not None:
                    spine.append(E.itemref(idref=ref.id))
        guide = E.guide()
        if self.guide is not None:
            for ref in self.guide:
                href = ref.href()
                if isinstance(href, bytes):
                    href = href.decode('utf-8')
                item = E.reference(type=ref.type, href=href)
                if ref.title:
                    item.set('title', ref.title)
                guide.append(item)
        if process_guide is not None:
            process_guide(E, guide)

        serialize_user_metadata(metadata, self.get_all_user_metadata(False))

        root = E.package(
                metadata,
                manifest,
                spine,
                guide
        )
        root.set('unique-identifier', __appname__+'_id')
        root.set('version', '2.0')
        raw = etree.tostring(root, pretty_print=True, xml_declaration=True,
                encoding=encoding)
        raw = raw.replace(DNS.encode('utf-8'), OPF2_NS.encode('utf-8'))
        opf_stream.write(raw)
        opf_stream.flush()
        if toc is not None and ncx_stream is not None:
            toc.render(ncx_stream, self.application_id)
            ncx_stream.flush()


def metadata_to_opf(mi, as_string=True, default_lang=None):
    import textwrap

    from lxml import etree

    from calibre.ebooks.oeb.base import DC, OPF

    if not mi.application_id:
        mi.application_id = str(uuid.uuid4())

    if not mi.uuid:
        mi.uuid = str(uuid.uuid4())

    if not mi.book_producer:
        mi.book_producer = __appname__ + f' ({__version__}) ' + '[https://calibre-ebook.com]'

    if not mi.languages:
        lang = (get_lang().replace('_', '-').partition('-')[0] if default_lang
                is None else default_lang)
        mi.languages = [lang]

    root = safe_xml_fromstring(textwrap.dedent(
    '''
    <package xmlns="http://www.idpf.org/2007/opf" unique-identifier="uuid_id" version="2.0">
        <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
            <dc:identifier opf:scheme="{a}" id="{a}_id">{id}</dc:identifier>
            <dc:identifier opf:scheme="uuid" id="uuid_id">{uuid}</dc:identifier>
            </metadata>
        <guide/>
    </package>
    '''.format(**dict(a=__appname__, id=mi.application_id, uuid=mi.uuid))))
    metadata = root[0]
    guide = root[1]
    metadata[0].tail = '\n'+(' '*8)

    def factory(tag, text=None, sort=None, role=None, scheme=None, name=None,
            content=None):
        attrib = {}
        if sort:
            attrib[OPF('file-as')] = sort
        if role:
            attrib[OPF('role')] = role
        if scheme:
            attrib[OPF('scheme')] = scheme
        if name:
            attrib['name'] = name
        if content:
            attrib['content'] = content
        try:
            elem = metadata.makeelement(tag, attrib=attrib)
        except ValueError:
            elem = metadata.makeelement(tag, attrib={k:clean_xml_chars(v) for k, v in attrib.items()})
        elem.tail = '\n'+(' '*8)
        if text:
            try:
                elem.text = text.strip()
            except ValueError:
                elem.text = clean_ascii_chars(text.strip())
        metadata.append(elem)

    factory(DC('title'), mi.title)
    for au in mi.authors:
        factory(DC('creator'), au, mi.author_sort, 'aut')
    factory(DC('contributor'), mi.book_producer, __appname__, 'bkp')
    if hasattr(mi.pubdate, 'isoformat'):
        factory(DC('date'), isoformat(mi.pubdate))
    if hasattr(mi, 'category') and mi.category:
        factory(DC('type'), mi.category)
    if mi.comments:
        factory(DC('description'), clean_ascii_chars(mi.comments))
    if mi.publisher:
        factory(DC('publisher'), mi.publisher)
    for key, val in mi.get_identifiers().items():
        factory(DC('identifier'), val, scheme=icu_upper(key))
    if mi.rights:
        factory(DC('rights'), mi.rights)
    for lang in mi.languages:
        if not lang or lang.lower() == 'und':
            continue
        factory(DC('language'), lang)
    if mi.tags:
        for tag in mi.tags:
            factory(DC('subject'), tag)

    def meta(n, c):
        return factory('meta', name='calibre:' + n, content=c)
    if not mi.is_null('link_maps'):
        meta('link_maps', dump_dict(mi.link_maps))
    if mi.series:
        meta('series', mi.series)
    if mi.series_index is not None:
        meta('series_index', mi.format_series_index())
    if mi.rating is not None:
        meta('rating', str(mi.rating))
    if hasattr(mi.timestamp, 'isoformat'):
        meta('timestamp', isoformat(mi.timestamp))
    if mi.publication_type:
        meta('publication_type', mi.publication_type)
    if mi.title_sort:
        meta('title_sort', mi.title_sort)
    if mi.user_categories:
        meta('user_categories', dump_dict(mi.user_categories))

    serialize_user_metadata(metadata, mi.get_all_user_metadata(False))
    all_annotations = getattr(mi, 'all_annotations', None)
    if all_annotations:
        serialize_annotations(metadata, all_annotations)

    metadata[-1].tail = '\n' +(' '*4)

    if mi.cover:
        if not isinstance(mi.cover, str):
            mi.cover = mi.cover.decode(filesystem_encoding)
        guide.text = '\n'+(' '*8)
        r = guide.makeelement(OPF('reference'),
                attrib={'type':'cover', 'title':_('Cover'), 'href':mi.cover})
        r.tail = '\n' +(' '*4)
        guide.append(r)
    if pretty_print_opf:
        _pretty_print(root)

    return etree.tostring(root, pretty_print=True, encoding='utf-8',
            xml_declaration=True) if as_string else root


def test_m2o():
    from calibre.utils.date import now as nowf
    mi = MetaInformation('test & title', ['a"1', "a'2"])
    mi.title_sort = 'a\'"b'
    mi.author_sort = 'author sort'
    mi.pubdate = nowf()
    mi.language = 'en'
    mi.comments = 'what a fun book\n\n'
    mi.publisher = 'publisher'
    mi.set_identifiers({'isbn':'booo', 'dummy':'dummy'})
    mi.tags = ['a', 'b']
    mi.series = 's"c\'l&<>'
    mi.series_index = 3.34
    mi.rating = 3
    mi.timestamp = nowf()
    mi.publication_type = 'ooooo'
    mi.rights = 'yes'
    mi.cover = os.path.abspath('asd.jpg')
    opf = metadata_to_opf(mi)
    print(opf)
    newmi = MetaInformation(OPF(io.BytesIO(opf)))
    for attr in ('author_sort', 'title_sort', 'comments',
                    'publisher', 'series', 'series_index', 'rating',
                    'isbn', 'tags', 'cover_data', 'application_id',
                    'language', 'cover',
                    'book_producer', 'timestamp',
                    'pubdate', 'rights', 'publication_type'):
        o, n = getattr(mi, attr), getattr(newmi, attr)
        if o != n and o.strip() != n.strip():
            print('FAILED:', attr, getattr(mi, attr), '!=', getattr(newmi, attr))
    if mi.get_identifiers() != newmi.get_identifiers():
        print('FAILED:', 'identifiers', mi.get_identifiers(), end=' ')
        print('!=', newmi.get_identifiers())


def suite():
    import unittest

    class OPFTest(unittest.TestCase):

        def setUp(self):
            self.stream = io.BytesIO(
    b'''\
    <?xml version="1.0"  encoding="UTF-8"?>
    <package version="2.0" xmlns="http://www.idpf.org/2007/opf" >
    <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
        <dc:title opf:file-as="Wow">A Cool &amp; &copy; &#223; Title</dc:title>
        <creator opf:role="aut" file-as="Monkey">Monkey Kitchen</creator>
        <creator opf:role="aut">Next</creator>
        <dc:subject>One</dc:subject><dc:subject>Two</dc:subject>
        <dc:identifier scheme="ISBN">123456789</dc:identifier>
        <dc:identifier scheme="dummy">dummy</dc:identifier>
        <meta name="calibre:series" content="A one book series" />
        <meta name="calibre:rating" content="4"/>
        <meta name="calibre:publication_type" content="test"/>
        <meta name="calibre:series_index" content="2.5" />
    </metadata>
    <manifest>
        <item id="1" href="a%20%7E%20b" media-type="text/txt" />
    </manifest>
    </package>
    '''
            )
            self.opf = OPF(self.stream, os.getcwd())

        def testReading(self, opf=None):
            if opf is None:
                opf = self.opf
            self.assertEqual(opf.title, 'A Cool & \xa9 \xdf Title')
            self.assertEqual(opf.authors, 'Monkey Kitchen,Next'.split(','))
            self.assertEqual(opf.author_sort, 'Monkey')
            self.assertEqual(opf.title_sort, 'Wow')
            self.assertEqual(opf.tags, ['One', 'Two'])
            self.assertEqual(opf.isbn, '123456789')
            self.assertEqual(opf.series, 'A one book series')
            self.assertEqual(opf.series_index, 2.5)
            self.assertEqual(opf.rating, 4)
            self.assertEqual(opf.publication_type, 'test')
            self.assertEqual(list(opf.itermanifest())[0].get('href'), 'a ~ b')
            self.assertEqual(opf.get_identifiers(), {'isbn':'123456789',
                'dummy':'dummy'})

        def testWriting(self):
            for test in [('title', 'New & Title'), ('authors', ['One', 'Two']),
                        ('author_sort', 'Kitchen'), ('tags', ['Three']),
                        ('isbn', 'a'), ('rating', 3), ('series_index', 1),
                        ('title_sort', 'ts')]:
                setattr(self.opf, *test)
                attr, val = test
                self.assertEqual(getattr(self.opf, attr), val)

            self.opf.render()

        def testCreator(self):
            opf = OPFCreator(os.getcwd(), self.opf)
            buf = io.BytesIO()
            opf.render(buf)
            raw = buf.getvalue()
            self.testReading(opf=OPF(io.BytesIO(raw), os.getcwd()))

        def testSmartUpdate(self):
            self.opf.smart_update(MetaInformation(self.opf))
            self.testReading()

    return unittest.TestLoader().loadTestsFromTestCase(OPFTest)


def test():
    import unittest
    unittest.TextTestRunner(verbosity=2).run(suite())


def test_user_metadata():
    mi = Metadata('Test title', ['test author1', 'test author2'])
    um = {
        '#myseries': {'#value#': 'test series\xe4', 'datatype':'text',
            'is_multiple': None, 'name': 'My Series'},
        '#myseries_index': {'#value#': 2.45, 'datatype': 'float',
            'is_multiple': None},
        '#mytags': {'#value#':['t1','t2','t3'], 'datatype':'text',
            'is_multiple': '|', 'name': 'My Tags'}
        }
    mi.set_all_user_metadata(um)
    raw = metadata_to_opf(mi)
    opfc = OPFCreator(os.getcwd(), other=mi)
    out = io.BytesIO()
    opfc.render(out)
    raw2 = out.getvalue()
    f = io.BytesIO(raw)
    opf = OPF(f)
    f2 = io.BytesIO(raw2)
    opf2 = OPF(f2)
    assert um == opf._user_metadata_
    assert um == opf2._user_metadata_
    print(opf.render())


if __name__ == '__main__':
    # test_user_metadata()
    test_m2o()
    test()
